

tweets <- readRDS("temp/tweets_w_cities.rds") |>
  filter(!is.na(city_1))


tweets_long <- tweets |> 
  select(id, created_at, starts_with("city_")) |> 
  pivot_longer(starts_with("city_"), names_to = "count", values_to = "city") |> 
  filter(!is.na(city))


geos <- fread("raw_data/city_geoid.csv") |> 
  mutate(city = removeWords(city, stopwords("english")),
         city = stripWhitespace(tolower(removeNumbers(removePunctuation(city)))))

abbr <- fread("raw_data/city_abbr.csv") |> 
  mutate(across(c(name, city), ~ stripWhitespace(
    tolower(removeNumbers(removePunctuation(removeWords(., stopwords("english"))))))))


tweets_long <- rbind(
  filter(tweets_long, city %in% tolower(geos$city)),
  left_join(filter(tweets_long, !(city %in% tolower(geos$city))),
            abbr |> 
              mutate(across(.fns = tolower)) |> 
              rename(x = city), by = c("city" = "name")) |> 
    mutate(city = x) |> 
    select(-x)
) |> 
  group_by(id, city) |> 
  filter(row_number() == 1)

city_count <- tweets_long |> 
  mutate(pre_election = created_at <= "2020-11-03",
         date = date(created_at)) |> 
  group_by(city, pre_election, date) |> 
  summarize(n = n())

saveRDS(city_count, "temp/counts_by_city_day.rds")

city_count <- readRDS("temp/counts_by_city_day.rds")

city_count <- city_count |> 
  group_by(city, pre_election) |> 
  summarize(n = sum(n))

city_count <- city_count |> 
  pivot_wider(id_cols = "city", values_from = c("n"), names_from = "pre_election")

colnames(city_count) <- c("city", "post_election", "pre_election")


city_count <- city_count |> 
  mutate(across(c(starts_with("pre_election"), starts_with("post_election")), ~ ifelse(is.na(.), 0, .)),
         n = pre_election + post_election)

city_count <- full_join(city_count, geos, by = "city") |> 
  mutate(across(c(starts_with("pre_election"), starts_with("post_election")), ~ ifelse(is.na(.), 0, .))) |> 
  rename(GEOID = geoid)

city_count <- filter(city_count, !(tolower(city) %in% tolower(fread("raw_data/exclude.csv")$city)))

city_count <- mutate(city_count, city = ifelse(GEOID == 3651000, "new york", city)) |> 
  group_by(city, state_name, GEOID) |> 
  summarize(across(c(n, pre_election, post_election), sum, na.rm = T))

race <- census_race_ethnicity("place", 2020)
income <- census_income("place", 2020)
education <- census_education("place", 2020)
age <- census_median_age("place", 2020)

city_count <- left_join(city_count, race |> 
                          mutate(GEOID = as.numeric(GEOID)))

city_count <- left_join(city_count, income |> 
                          mutate(GEOID = as.numeric(GEOID)))

city_count <- left_join(city_count, education |> 
                          mutate(GEOID = as.numeric(GEOID)))

city_count <- left_join(city_count, age |> 
                          mutate(GEOID = as.numeric(GEOID)))

cleanup(c("city_count", "tweets"))

assess_2020 <- function(s){
  # if(!(file.exists(paste0("temp/bgs_dists_new_", s, ".rds")))){
  library(tigris)
  library(rgdal)
  library(sf)
  library(sp)
  library(rgeos)
  library(SearchTrees)
  library(raster)
  library(data.table)
  library(tidyverse)

  state <- substring(list.files(s)[1], 1, 2)
  print(state)
  places_t <- tigris::places(state = state, class = "sp")
  ## pull BG shapefiles using tigris package
  bgs <- readOGR(dsn = s, layer = substring(list.files(s)[1], 1, nchar(list.files(s)[1]) - 4))

  bgs <- spTransform(bgs, CRS("+proj=longlat +datum=NAD83 +no_defs"))

  centroids <- SpatialPoints(
    gCentroid(bgs, byid = TRUE)@coords
  )


  bgs <- cbind(bgs@data,
               gCentroid(bgs, byid = TRUE)@coords) |>
    rename(INTPTLON = x,
           INTPTLAT = y) |>
    mutate(GEOID = paste0(state, row_number()),
           across(c(G20PREDBID, G20PRERTRU), as.numeric))

  #########################################


  pings  <- SpatialPoints(bgs[,c('INTPTLON','INTPTLAT')], proj4string = places_t@proj4string)

  bgs$place <- over(pings, places_t)$GEOID

  bgs <- bgs |>
    filter(!is.na(place)) |>
    group_by(place) |>
    mutate(across(c(G20PREDBID, G20PRERTRU), ~ as.numeric(gsub(",", "", .)))) |>
    summarize(share_dem = sum(G20PREDBID) / sum(G20PREDBID + G20PRERTRU))


  saveRDS(bgs, paste0("temp/city_demshare_", state, ".rds"))
  # }
}

cl <- makeCluster(8)
registerDoParallel(cl)

clusterExport(cl, list("assess_2020"))


runs <- list.dirs("raw_data/vest/vest_2020")[2:length(list.dirs("raw_data/vest/vest_2020"))][c(1:32, 34:52)]

c(parLapply(cl, runs,
            fun = assess_2020))


files <- c(list.files(path = "temp/", pattern = "^city_demshare_*", full.names = T))

all_bgs <- rbindlist(lapply(files, readRDS))

city_count <- left_join(city_count, all_bgs |> 
                          mutate(GEOID = as.numeric(place)) |> 
                          dplyr::select(-place))

city_count <- city_count |> 
  group_by(city) |> 
  arrange(-population) |> 
  group_by(city) |> 
  filter(row_number() == 1)

saveRDS(city_count, "temp/city_count_pre_reg.rds")
###############
city_count <- readRDS("temp/city_count_pre_reg.rds") |> 
  filter(city != "washington")

city_count <- mutate(city_count,
                     across(c(n, pre_election, post_election), ~ (. / population) * 1000))

tab <- city_count |> 
  arrange(desc(post_election)) |> 
  ungroup() |> 
  # filter(row_number() <= 10) |>
  mutate(city = str_to_title(city),
         across(c(n, pre_election, post_election), ~ scales::comma(., accuracy = .1)),
         nh_black = paste0(format(100 * nh_black, digits = 2), "\\%")) |> 
  select(City = city,
         State = state_name,
         `Total Mentions` = n,
         `Pre-Election Mentions` = pre_election,
         `Post-Election Mentions` = post_election,
         `Share Black` = nh_black)

for(i in c(2:nrow(tab))){
  if(i %% 2 == 0){
    tab$City[i] <- paste0("\\rowcolor{Gray}", tab$City[i])
  }
}

kable(filter(tab, row_number() <= 10),
      "latex", caption = "\\label{tab:city-list} Most Frequently Mentioned Municipalities\\\\Mentions per Thousand Residents",
      linesep = "", align = c("l", rep("c", 6)),
      booktabs = T, escape = F) |> 
  column_spec(c(1:6), width = "2.5cm") |>
  # column_spec(c(3:6), width = "3cm") |>
  kable_styling(latex_options = c("scale_down", "HOLD_position")) |> 
  save_kable("temp/city_list.tex")

fwrite(tab, "temp/city_list.csv")

kable(select(tab[c(1:5),], `City`,
             `Post-Election Mentions`,
             `Share Black`), "latex",
      linesep = "", align = c("l", rep("c", 2)),
      booktabs = T, escape = F) |> 
  # column_spec(c(1:6), width = "2.5cm") |>
  # column_spec(c(3:6), width = "3cm") |>
  kable_styling(latex_options = c("scale_down", "HOLD_position")) |> 
  save_kable("temp/city_list_slim.tex")

m1 <- feols(n ~ nh_black + nh_white + share_dem +
              median_age + median_income + some_college |
              state_name, city_count, vcov = "iid")

m1 <- feols(n ~ nh_black + nh_white + share_dem +
              median_age + median_income + some_college |
              state_name, city_count, vcov = "iid")

m2 <- feols(pre_election ~ nh_black + nh_white + share_dem +
              median_age + median_income + some_college |
              state_name, city_count, vcov = "iid")

m3 <- feols(post_election ~ nh_black + nh_white + share_dem +
              median_age + median_income + some_college |
              state_name, city_count, vcov = "iid")

rows <- tribble(~term, ~m1,  ~m2, ~m3,
                "State Fixed Effects", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$")

attr(rows, 'position') <- c(13:13)

modelsummary(list("All Mentions" = m1,
                  "Pre-Election Mentions" = m2,
                  "Post-Election Mentions" = m3),
             stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001),
             coef_map = c("nh_black" = "Share Non-Hispanic Black",
                          "nh_white" = "Share Non-Hispanic White",
                          "share_dem" = "Biden Vote Share, 2020",
                          "median_age" = "Median Age",
                          "median_income" = "Median Income",
                          "some_college" = "Share with Some College"),
             gof_omit = 'DF|Deviance|AIC|BIC|Within|Pseudo|Log|Std|FE|RMSE',
             add_rows = rows,
             title = "\\label{tab:twitter-regs} Twitter Municipal Regressions (Mentions per 1k residents)",
             output = "latex",
             escape = FALSE) |> 
  kableExtra::save_kable("temp/twitter_regs.tex")

##########################
##########################
##########################
##########################
##########################

city_count <- readRDS("temp/city_count_pre_reg.rds") |> 
  filter(city != "washington")

city_count <- mutate(city_count,
                     across(c(n, pre_election, post_election,
                              pre_election_with_retweets, post_election_with_retweets), ~ (. / 1000) * population))

m1 <- feols(n ~ nh_black + nh_white + share_dem +
              median_age + median_income + some_college + I(log(population)) |
              state_name, city_count, vcov = "iid")

m2 <- feols(pre_election ~ nh_black + nh_white + share_dem +
              median_age + median_income + some_college + I(log(population)) |
              state_name, city_count, vcov = "iid")

m3 <- feols(post_election ~ nh_black + nh_white + share_dem +
              median_age + median_income + some_college + I(log(population)) |
              state_name, city_count, vcov = "iid")

rows <- tribble(~term, ~m1,  ~m2, ~m3,
                "State Fixed Effects", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$")

attr(rows, 'position') <- c(15:15)

modelsummary(list("All Mentions" = m1,
                  "Pre-Election Mentions" = m2,
                  "Post-Election Mentions" = m3),
             stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001),
             coef_map = c("nh_black" = "Share Non-Hispanic Black",
                          "nh_white" = "Share Non-Hispanic White",
                          "share_dem" = "Biden Vote Share, 2020",
                          "median_age" = "Median Age",
                          "median_income" = "Median Income",
                          "some_college" = "Share with Some College",
                          "I(log(population))" = "Log(Population)"),
             gof_omit = 'DF|Deviance|AIC|BIC|Within|Pseudo|Log|Std|FE|RMSE',
             add_rows = rows,
             title = "\\label{tab:twitter-regs-alt} Twitter Municipal Regressions",
             output = "latex",
             escape = FALSE) |> 
  kableExtra::save_kable("temp/twitter_regs_alt.tex")

####################